Back to Article
Notebook 04 - Sampling Map
Download Source

Notebook 04 - Sampling Map

Author

Sam Welch

Published

November 25, 2025

In [1]:
Code
library(STOPeData)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
Code
library(leaflet)
library(sf)
Linking to GEOS 3.13.1, GDAL 3.11.0, PROJ 9.6.0; sf_use_s2() is TRUE
Code
library(bslib)

Attaching package: 'bslib'
The following object is masked from 'package:utils':

    page
Code
library(eDataDRF)

Attaching package: 'eDataDRF'
The following objects are masked from 'package:STOPeData':

    abbreviate_string, altitude_units_vocabulary,
    analytical_protocols_vocabulary, areas_vocabulary,
    coordinate_systems_vocabulary, countries_vocabulary,
    dummy_parameters_vocabulary, environ_compartments_sub_vocabulary,
    environ_compartments_vocabulary, extraction_protocols_vocabulary,
    fractionation_protocols_vocabulary, gender_vocabulary,
    generate_protocol_id, geographic_features_sub_vocabulary,
    geographic_features_vocabulary, get_dataset_display_name,
    initialise_biota_tibble, initialise_campaign_tibble,
    initialise_compartments_tibble, initialise_CREED_data_tibble,
    initialise_CREED_scores_tibble, initialise_measurements_tibble,
    initialise_methods_tibble, initialise_parameters_tibble,
    initialise_references_tibble, initialise_samples_tibble,
    initialise_sites_tibble, lifestage_vocabulary,
    measured_categories_vocabulary, measured_flags_vocabulary,
    measured_types_vocabulary, parameter_types_sub_vocabulary,
    parameter_types_vocabulary, parameter_unit_vocabulary,
    protocol_categories_vocabulary, protocol_options_vocabulary,
    reference_character_limits, sampling_protocols_vocabulary,
    species_groups_vocabulary, species_names_vocabulary,
    tissue_types_vocabulary, uncertainty_types_vocabulary
Code
library(targets)

devtools::load_all()
ℹ Loading EXPECT_AEP_R_Project
In [2]:
Code
# load in any data we need from the targets workflow
literature_merged_data <- tar_read(load_literature_pqt)
literature_reference_data <- tar_read(reference_data)
literature_campaign_data <- tar_read(campaign_data)
literature_sites_data <- tar_read(sites_data)
literature_qc <- tar_read(data_quality_report)
wgs84_geo <- tar_read(wgs84_geography)

species_names <- eDataDRF::species_names_vocabulary()
species_lookup <- species_names |>
  with(setNames(SPECIES_COMMON_NAME, SPECIES_NAME))
In [3]:
Code

copper_toxicity_thresholds <- tar_read(copper_toxicity_thresholds)
# Define threshold colors based on the report ----
threshold_colours <- c(
  "Background (I)" = "#0070C0", # Blue
  "Good (II)" = "#00B050", # Green
  "Moderate (III)" = "#FFC000", # Yellow
  "Poor (IV)" = "#FF6600", # Orange
  "Very Poor (V)" = "#FF0000" # Red
)

compartment_colours <- c(
  # Aquatic ----
  "Freshwater" = "#4A90E2", # clear blue
  "Marine/Salt Water" = "#1E5A8E", # deep ocean blue
  "Brackish/Transitional Water" = "#6BA5C8", # mid-blue (between fresh/marine)
  "Groundwater" = "#2C5F7B", # darker, subdued blue
  "Wastewater" = "#8B7355", # murky brown
  "Liquid Growth Medium" = "#9FD8CB", # pale greenish-blue
  "Rainwater" = "#B3D9FF", # light sky blue
  "Stormwater" = "#607D8B", # grey-blue
  "Leachate" = "#6B5344", # dark muddy brown
  "Aquatic Sediment" = "#EBCF1B", # your existing yellow
  "Porewater" = "#7FA0B8", # muted blue-grey
  "Sludge" = "#67AB33", # your existing green

  # Atmospheric ----
  "Indoor Air" = "#E8F4F8", # very pale blue-grey
  "Outdoor Air" = "#87CEEB", # sky blue

  # Terrestrial ----
  "Terrestrial Biological Residue" = "#8B6F47", # organic brown
  "Soil H Horizon (Peat)" = "#3E2723", # very dark brown
  "Soil O Horizon (Organic)" = "#5D4037", # dark organic brown
  "Soil A Horizon (Topsoil)" = "#795548", # medium brown
  "Soil E Horizon (Mineral)" = "#A1887F", # light greyish-brown
  "Soil S Horizon (Mineral)" = "#BCAAA4", # pale brown-grey
  "Soil C Horizon (Parent Material)" = "#D7CCC8", # very pale brown
  "Soil R Horizon (Bedrock)" = "#9E9E9E", # grey

  # Biota ----
  "Biota, Terrestrial" = "#558B2F", # forest green
  "Biota, Aquatic" = "#00695C", # teal
  "Biota, Atmospheric" = "#B39DDB", # light purple (birds/insects)
  "Biota, Other" = "#9C27B0" # distinct purple
)

# Helper function for sample sizes
calculate_sample_sizes <- function(data) {
  data |>
    group_by(
      REFERENCE_ID,
      OCEAN_COUNTRY,
      SITE_GEOGRAPHIC_FEATURE,
      ENVIRON_COMPARTMENT_SUB
    ) |>
    summarise(total_n = sum(MEASURED_N, na.rm = TRUE), .groups = "drop")
}

# Create sub-compartment letter codes ----
subcomp_codes <- c(
  # Aquatic
  "Freshwater" = "F",
  "Marine/Salt Water" = "M",
  "Brackish/Transitional Water" = "B",
  "Groundwater" = "G",
  "Wastewater" = "WW",
  "Liquid Growth Medium" = "LGM",
  "Rainwater" = "R",
  "Stormwater" = "SW",
  "Leachate" = "L",
  "Aquatic Sediment" = "AS",
  "Porewater" = "P",
  "Sludge" = "Sl",
  # Atmospheric
  "Indoor Air" = "IA",
  "Outdoor Air" = "OA",
  # Terrestrial
  "Terrestrial Biological Residue" = "TBR",
  "Soil H Horizon (Peat)" = "H",
  "Soil O Horizon (Organic)" = "O",
  "Soil A Horizon (Topsoil)" = "A",
  "Soil E Horizon (Mineral)" = "E",
  "Soil S Horizon (Mineral)" = "S",
  "Soil C Horizon (Parent Material)" = "C",
  "Soil R Horizon (Bedrock)" = "R",
  # Biota
  "Biota, Terrestrial" = "BT",
  "Biota, Aquatic" = "BA",
  "Biota, Atmospheric" = "BAm",
  "Biota, Other" = "BO"
)

Geographic Distribution of Sampling Sites

In [4]:
Code

# Turn off spherical geometry ----
sf::sf_use_s2(FALSE)
Spherical geometry (s2) switched off
Code

# Prepare spatial data ----
data_sf <- literature_merged_data |>
  filter(
    !is.na(LATITUDE),
    !is.na(LONGITUDE),
    !is.na(MEASURED_OR_IMPUTED_VALUE_STANDARD)
  ) |>
  st_as_sf(
    coords = c("LONGITUDE", "LATITUDE"),
    crs = st_crs("WGS84"),
    remove = FALSE
  )

# Prepare apple data ----
number_of_sites <- nrow(municipality_centroids())
sales_data_split <- copper_oxide_sales() |>
  mutate(
    amount_kg = amount_kg / number_of_sites,
    average_kg = average_kg / number_of_sites
  )

apple_data <- cross_join(municipality_centroids(), sales_data_split) |>
  group_by(name, x, y) |>
  summarise(
    avg_copper_kg = mean(amount_kg, na.rm = TRUE),
    .groups = "drop"
  ) |>
  st_as_sf(
    coords = c("x", "y"),
    crs = st_crs("WGS84"),
    remove = FALSE
  )

environ_compartments_vocabulary <- environ_compartments_vocabulary() |>
  setdiff(c("Not relevant", "Not reported", "Other"))

# Create colour palette for main compartments ----
compartment_pal <- colorFactor(
  palette = palette.colors(n = 4, palette = "Tableau"),
  domain = environ_compartments_vocabulary
)

# Create colour palette for measured values ----
value_range <- range(data_sf$MEASURED_VALUE_STANDARD, na.rm = TRUE)
value_pal <- colorNumeric(
  palette = "viridis",
  domain = value_range,
  na.color = "gray"
)

# Add codes to data ----
data_sf <- data_sf |>
  mutate(
    SUBCOMP_CODE = subcomp_codes[ENVIRON_COMPARTMENT_SUB],
    SUBCOMP_CODE = ifelse(is.na(SUBCOMP_CODE), "?", SUBCOMP_CODE)
  )

# Define map colours ----
marine_colors <- list(
  default = "lightblue",
  highlight = "darkblue"
)

country_colours <- list(
  default = "lightgreen",
  highlight = "darkgreen"
)

# Calculate polygon centroids for labels ----
marine_centroids <- wgs84_geo$marine_polys |>
  filter(highlight_name) |>
  st_centroid()
Warning: st_centroid assumes attributes are constant over geometries
Warning in st_centroid.sfc(st_geometry(x), of_largest_polygon =
of_largest_polygon): st_centroid does not give correct centroids for
longitude/latitude data
Code

country_centroids <- wgs84_geo$countries |>
  filter(
    highlight_name,
    !name %in% c("Greenland", "Iceland", "Norway")
  ) |>
  st_centroid()
Warning: st_centroid assumes attributes are constant over geometries
Warning: st_centroid does not give correct centroids for longitude/latitude
data
Code

# Create study connection lines ----
study_lines <- data_sf |>
  st_drop_geometry() |>
  group_by(REFERENCE_ID) |>
  filter(n() > 1, CAMPAIGN_NAME_SHORT != "Vm_2025") |> # simply too many sites in Vm_2025 for polylines to be practical
  summarise(
    coords = list(cbind(LONGITUDE, LATITUDE)),
    .groups = "drop"
  )

# Convert to linestring sf object ----
study_lines_sf <- study_lines |>
  rowwise() |>
  mutate(
    geometry = list(st_linestring(coords))
  ) |>
  ungroup() |>
  select(-coords) |>
  st_sf(crs = st_crs("WGS84"))

@fig-sampling-map shows the spatial distribution of all sampling locations included in the analysis. This map helps visualise the geographic scope of copper monitoring in Arctic regions and identify areas with high or low sampling intensity.

In [5]:
Code

# Create map ----
map <- leaflet() |>
  addProviderTiles(providers$CartoDB.Positron) |>

  # Add ocean polygons (no popups) ----
  addPolygons(
    data = wgs84_geo$marine_polys |> filter(highlight_name),
    fillColor = ~ ifelse(
      highlight_name == TRUE,
      marine_colors[["highlight"]],
      marine_colors[["default"]]
    ),
    fillOpacity = ~ ifelse(highlight_name == TRUE, 0.3, 0),
    color = "black",
    weight = 1,
    group = "Geography"
  ) |>

  # Add country polygons (no popups) ----
  addPolygons(
    data = wgs84_geo$countries |> filter(highlight_name),
    fillColor = ~ ifelse(
      highlight_name == TRUE,
      country_colours[["highlight"]],
      country_colours[["default"]]
    ),
    fillOpacity = ~ ifelse(highlight_name == TRUE, 0.3, 0),
    weight = 1,
    color = "black",
    group = "Geography"
  ) |>

  # Add permanent labels for oceans ----
  addLabelOnlyMarkers(
    data = marine_centroids,
    label = ~name,
    labelOptions = labelOptions(
      noHide = TRUE,
      direction = "center",
      textOnly = TRUE,
      style = list(
        "color" = "darkblue",
        "font-weight" = "bold",
        "font-size" = "14px",
        "text-shadow" = "1px 1px 2px white, -1px -1px 2px white"
      )
    ),
    group = "Geography"
  ) |>

  # Add permanent labels for countries ----
  addLabelOnlyMarkers(
    data = country_centroids,
    label = ~name,
    labelOptions = labelOptions(
      noHide = TRUE,
      direction = "center",
      textOnly = TRUE,
      style = list(
        "color" = "darkgreen",
        "font-weight" = "bold",
        "font-size" = "12px",
        "text-shadow" = "1px 1px 2px white, -1px -1px 2px white"
      )
    ),
    group = "Geography"
  ) |>

  # Add study connection lines ----
  # addPolylines(
  #   data = study_lines_sf,
  #   color = "purple",
  #   weight = 2,
  #   opacity = 0.5,
  #   dashArray = "5, 5",
  #   label = ~ paste0("Study: ", REFERENCE_ID),
  #   labelOptions = labelOptions(
  #     style = list("font-weight" = "normal")
  #   ),
  #   group = "Study Connections"
  # ) |>

  # Add mine tailings markers ----
  addMarkers(
    data = mine_tailings_coords(),
    lng = ~LONGITUDE,
    lat = ~LATITUDE,
    icon = list(
      iconUrl = "https://upload.wikimedia.org/wikipedia/commons/8/8c/Mining_symbol.svg",
      iconSize = c(15, 15)
    ),
    popup = ~ paste0(
      "<b>Mine:</b> ",
      MINE_NAME,
      "<br>",
      "<b>Location:</b> ",
      SITE_NAME,
      "<br>",
      "<b>Ore Type:</b> ",
      ORE_TYPE,
      "<br>",
      "<b>Status:</b> ",
      ifelse(ACTIVE_2013, "Active (2013)", "Terminated"),
      "<br>",
      "<b>Details:</b> ",
      KEY_DETAILS,
      "<br>",
      "<b>Coordinate Confidence:</b> ",
      CONFIDENCE,
      "/5"
    ),
    group = "Mine Tailings"
  ) |>

  # apple markers... for apples
  addMarkers(
    data = apple_data,
    lng = ~x,
    lat = ~y,
    icon = list(
      iconUrl = "https://www.svgrepo.com/show/42619/apple-fruit.svg",
      iconSize = c(15, 15)
    ),
    popup = ~ paste0(
      "<b>Municipality:</b> ",
      name,
      "<br>",
      "<b>Average Copper Applied:</b> ",
      round(avg_copper_kg, 1),
      " kg/year",
      "<br>",
      "<b>Period:</b> 2020-2024"
    ),
    group = "Apple Production"
  ) |>

  # add case study sites
  addCircles(
    data = expect_case_studies(),
    lng = ~lon,
    lat = ~lat,
    radius = 100000, # 100km radius
    color = "hotpink",
    fillColor = "hotpink",
    fillOpacity = 0.3,
    opacity = 0.6,
    weight = 2,
    popup = ~ paste0(
      "<strong>",
      site_name,
      "</strong><br/>",
      "<em>Lat: ",
      lat,
      ", Lon: ",
      lon,
      "</em><br/><br/>",
      summary
    ),
    group = "Potential Case Studies"
  )

# Add layers for each main compartment ----
for (comp in environ_compartments_vocabulary) {
  data_subset <- data_sf |>
    filter(ENVIRON_COMPARTMENT == comp)

  if (nrow(data_subset) > 0) {
    # Build popup HTML with conditional species info
    popup_html <- if (comp == "Biota") {
      ~ paste0(
        "<b>Reference:</b> ",
        REFERENCE_ID,
        "<br>",
        "<b>Site:</b> ",
        SITE_CODE,
        "<br>",
        "<b>Date:</b> ",
        SAMPLING_DATE,
        "<br>",
        "<b>Compartment:</b> ",
        ENVIRON_COMPARTMENT,
        "<br>",
        "<b>Sub-compartment:</b> ",
        ENVIRON_COMPARTMENT_SUB,
        " (",
        SUBCOMP_CODE,
        ")",
        "<br>",
        "<b>Species:</b> ",
        SAMPLE_SPECIES,
        "<br>",
        "<b>Measured Value:</b> ",
        round(MEASURED_VALUE_STANDARD, 3),
        " ",
        MEASURED_UNIT_STANDARD
      )
    } else {
      ~ paste0(
        "<b>Reference:</b> ",
        REFERENCE_ID,
        "<br>",
        "<b>Site:</b> ",
        SITE_CODE,
        "<br>",
        "<b>Date:</b> ",
        SAMPLING_DATE,
        "<br>",
        "<b>Compartment:</b> ",
        ENVIRON_COMPARTMENT,
        "<br>",
        "<b>Sub-compartment:</b> ",
        ENVIRON_COMPARTMENT_SUB,
        " (",
        SUBCOMP_CODE,
        ")",
        "<br>",
        "<b>Measured Value:</b> ",
        round(MEASURED_VALUE_STANDARD, 3),
        " ",
        MEASURED_UNIT_STANDARD
      )
    }

    map <- map |>
      addCircleMarkers(
        data = data_subset,
        fillColor = ~ value_pal(MEASURED_VALUE_STANDARD),
        color = compartment_pal(comp),
        weight = 2,
        fillOpacity = 0.7,
        radius = 6,
        # label = ~SUBCOMP_CODE,
        # labelOptions = labelOptions(
        #   noHide = TRUE,
        #   direction = "center",
        #   textOnly = TRUE,
        #   style = list(
        #     "color" = compartment_pal(comp),
        #     "font-weight" = "normal",
        #     "font-size" = "8px"
        #   )
        # ),
        popup = popup_html,
        clusterOptions = markerClusterOptions(), # Helps with performance
        group = "Samples"
      )
  }
}

# Add site labels as separate toggleable layer ----
map <- map |>
  # addLabelOnlyMarkers(
  #   data = data_sf,
  #   label = ~SITE_CODE,
  #   labelOptions = labelOptions(
  #     noHide = FALSE,
  #     direction = "top",
  #     textOnly = TRUE,
  #     style = list(
  #       "color" = "black",
  #       "font-size" = "9px",
  #       "background" = "rgba(43, 42, 42, 0.7)",
  #       "padding" = "2px"
  #     )
  #   ),
  #   group = "Site Labels"
  # ) |>

  # Add layer controls ----
  addLayersControl(
    baseGroups = character(0),
    overlayGroups = c(
      "Geography",
      "Study Connections",
      "Mine Tailings",
      "Apple Production",
      "Samples",
      "Site Labels",
      "Potential Case Studies"
    ),
    options = layersControlOptions(collapsed = FALSE)
  ) |>

  # Add legends ----
  addLegend(
    position = "bottomright",
    pal = compartment_pal,
    values = environ_compartments_vocabulary,
    title = "Compartment<br>(border colour)",
    opacity = 1
  ) |>
  addLegend(
    position = "bottomleft",
    pal = value_pal,
    values = data_sf$MEASURED_VALUE_STANDARD,
    title = "Measured Value<br>(fill colour)",
    opacity = 0.8,
    labFormat = labelFormat(digits = 1)
  )

# Display map ----
bslib::card(
  full_screen = TRUE,
  map,
  style = "text-align: left; z-index: 999999;"
) # yes, of course, that makes perfect sense